#!/usr/bin/env python
# -*- coding: utf-8 -*-
__author__ = 'poppy'
'''
BeautifulSoup4 test
'''
html_doc = """
<html><head><title>The Dormouse's story</title></head>

<p class="title"><b>The Dormouse's story</b></p>

<p class="story">Once upon a time there were three little sisters; and their names were
<a href="http://example.com/elsie" class="sister" id="link1">Elsie</a>,
<a href="http://example.com/lacie" class="sister" id="link2">Lacie</a> and
<a href="http://example.com/tillie" class="sister" id="link3">Tillie</a>;
and they lived at the bottom of a well.</p>
<b></b>
bbb
<p class="story">...</p>
"""

from bs4 import BeautifulSoup
import re
soup = BeautifulSoup(html_doc)
# print soup.head
# print soup.title
# print soup.body.b
# for a in soup.find_all('a'):
#     print a
# head_tag = soup.head
# print head_tag.contents
# title_tag = head_tag.contents[0]
# text = title_tag.contents[0]
# print text
# 
# for child in title_tag.children:
#     print child
print soup.find_all(id="link1")
# import re
# for tag in soup.find_all(re.compile("^b")):
#     print '===',tag.name
#     
# for tag in soup.find_all(re.compile("t")):
#     print(tag)